1. & 2.
nfl_data=read.csv('C:\\Users\\Sean\\OneDrive\\Desktop\\Grad School\\Machine Learning\\Module 2 - Intro to R\\nfl_19to22data.csv')

# Data Source: https://www.advancedsportsanalytics.com/nfl-raw-data
nfl_data

The dataset contains statistics for passing, rushing, and receiving per game for NFL players since 2019.

summary(nfl_data[,c("player","team","pass_yds","pass_td")])
##     player              team              pass_yds         pass_td      
##  Length:19973       Length:19973       Min.   : -2.00   Min.   :0.0000  
##  Class :character   Class :character   1st Qu.:  0.00   1st Qu.:0.0000  
##  Mode  :character   Mode  :character   Median :  0.00   Median :0.0000  
##                                        Mean   : 20.57   Mean   :0.1317  
##                                        3rd Qu.:  0.00   3rd Qu.:0.0000  
##                                        Max.   :525.00   Max.   :5.0000

Qualitative Variables:‘player’ & ‘team’ Quantitative Variables: ‘pass_yds’ & ‘pass_td’

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
nfl_data=nfl_data%>%mutate(pass_cmp=(pass_cmp/pass_att))
head(nfl_data)
summary(nfl_data$pass_cmp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   0.571   0.652   0.636   0.724   1.000   17963

Changed variable pass_cmp to return the ratio of passes completed instead of the total number of passes completed.

plot(nfl_data$rush_yds)

plot(nfl_data$pass_yds)

Created scatterplots for rushing yards and passing yards per player.